knitr::opts_knit$set(root.dir='../fwsw_results/')
source("../../gwscaR/R/gwscaR.R")
source("../../gwscaR/R/gwscaR_plot.R")
source("../../gwscaR/R/gwscaR_utility.R")
source("../../gwscaR/R/gwscaR_fsts.R")
source("../../gwscaR/R/gwscaR_popgen.R")
library(scales)
This analysis supplements that found in fwsw_analysis.R, which runs the actual analysis of the dataset. Here, I compare the Fst outliers and delta-divergence outliers to putative genes identified from other studies of teleosts adapting to freshwater environments.
#putative genes
put.genes<-read.delim("putative_genes.txt",header=TRUE,sep='\t')
#genome annotations
gff.name<-"ssc_2016_12_20_chromlevel.gff.gz"
if(length(grep("gz",gff.name))>0){
gff<-read.delim(gzfile(paste("../../scovelli_genome/",gff.name,sep="")),header=F)
} else{
gff<-read.delim(paste("../../scovelli_genome/",gff.name,sep=""),header=F)
}
colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
#Stacks output
vcf<-parse.vcf("p4.upd.vcf")
vcf$SNP<-paste(vcf$`#CHROM`,vcf$POS,sep=".")
scaffs<-levels(as.factor(vcf[,1]))
fwsw.tx<-read.delim("stacks/p4.fst_TXCC-TXFW.txt")
fwsw.la<-read.delim("stacks/p4.fst_ALST-LAFW.txt")
fwsw.al<-read.delim("stacks/p4.fst_ALFW-ALST.txt")
fwsw.fl<-read.delim("stacks/p4.fst_FLCC-FLLG.txt")
stacks.sig<-read.delim("p4.stacks.sig.snps.txt")
#annotated Stacks outliers at 0.01
fw.sig.reg<-read.csv("p4.StacksFWSWOutliers_annotatedByGenome.csv")
#deltad
deltad<-read.delim("sep.deltadivergence.txt")
sdd.out<-read.delim("sep.smoothedDD.out.txt",header=T,sep='\t')
#josts d
jostd<-read.delim("p4.jostd.perlocus.txt",header=F)
colnames(jostd)<-c("locid","D")
jostd$SNP<-vcf$SNP
jostd$POS<-vcf$POS
jostd$Chr<-vcf$`#CHROM`
jostd$ID<-vcf$ID
#ld info
ld<-read.delim("p4.ld_info_fwsw.txt")
#set up some helfpul things
fw.list<-c("TXFW","LAFW","ALFW","FLLG")
sw.list<-c("TXSP","TXCC","TXCB","ALST","FLSG","FLKB",
"FLFD","FLSI","FLAB","FLPB","FLHB","FLCC")
lgs<-c("LG1","LG2","LG3","LG4","LG5","LG6","LG7","LG8","LG9","LG10","LG11",
"LG12","LG13","LG14","LG15","LG16","LG17","LG18","LG19","LG20","LG21",
"LG22")
lgn<-seq(1,22)
all.colors<-c(rep("black",2),"#2166ac","black","#2166ac","black","#2166ac",
rep("black",8),"#2166ac")
grp.colors<-c('#762a83','#af8dc3','#e7d4e8','#d9f0d3','#7fbf7b','#1b7837')
These are updated from fwsw_analysis.R and are specific to these analyses, and are not generalizable!
ssc.fwsw.fstsig<-function(tx, la, al, fl, cutoff)
{
tx.sig<-tx[tx$Fisher.s.P<cutoff,"Locus.ID"]
la.sig<-la[la$Fisher.s.P<cutoff,"Locus.ID"]
al.sig<-al[al$Fisher.s.P<cutoff,"Locus.ID"]
fl.sig<-fl[fl$Fisher.s.P<cutoff,"Locus.ID"]
length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
fw.shared.chr<-fwsw.tx[fwsw.tx$Locus.ID %in% all.shared,c("Locus.ID","Chr","BP","Column","Overall.Pi")]
tapply(fw.shared.chr$Locus.ID,factor(fw.shared.chr$Chr),function(x){ length(unique(x)) })
#are they using the same SNPs or different SNPs?
stacks.sig<-data.frame(nrow=nrow(fw.shared.chr),ncol=4)
for(i in 1:nrow(fw.shared.chr)){
tx.bp<-fwsw.tx[tx$Fisher.s.P<cutoff & tx$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
la.bp<-fwsw.la[la$Fisher.s.P<cutoff & la$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
al.bp<-fwsw.al[al$Fisher.s.P<cutoff & al$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
fl.bp<-fwsw.fl[fl$Fisher.s.P<cutoff & fl$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
stacks.sig[i,1]<-paste(tx.bp,sep=",",collapse = ",")
stacks.sig[i,2]<-paste(la.bp,sep=",",collapse = ",")
stacks.sig[i,3]<-paste(al.bp,sep=",",collapse = ",")
stacks.sig[i,4]<-paste(fl.bp,sep=",",collapse = ",")
}
colnames(stacks.sig)<-c("TX","LA","AL","FL")
stacks.sig<-data.frame(cbind(fw.shared.chr,stacks.sig))
stacks.sig$SNP<-paste(stacks.sig$Chr,stacks.sig$BP,sep=".")
return(stacks.sig)
}
#' extract the significant regions from the gff file
sig.region.ann<-function(fw.shared.chr,gff)
{
fw.sig.reg<-do.call(rbind,apply(fw.shared.chr,1,function(sig){
this.gff<-gff[as.character(gff$seqname) %in% as.character(unlist(sig["Chr"])),]
this.reg<-this.gff[this.gff$start <= as.numeric(sig["BP"]) & this.gff$end >= as.numeric(sig["BP"]),]
if(nrow(this.reg) == 0){
if(as.numeric(sig["BP"])>max(as.numeric(this.gff$end))){
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region="beyond.last.contig", description=NA,SSCID=NA)
}else{
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region=NA,description=NA,SSCID=NA)
}
}else{
if(length(grep("SSCG\\d+",this.reg$attribute))>0){
geneID<-unique(gsub(".*(SSCG\\d+).*","\\1",this.reg$attribute[grep("SSCG\\d+",this.reg$attribute)]))
gene<-genome.blast[genome.blast$sscv4_gene_ID %in% geneID,"blastp_hit_description"]
}else{
geneID<-NA
gene<-NA
}
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region=paste(this.reg$feature,sep=",",collapse = ","),description=gene,SSCID=geneID)
}
return(as.data.frame(new))
}))
}
#' extract the significant regions from the gff file
put.reg<-do.call(rbind,apply(put.genes,1,function(gene){
ssc.gene<-as.character(gene[[6]])
#there might be multiple matches
ssc.genes<-unlist(strsplit(ssc.gene,","))
#for each gene, match to gff
gene.info<-do.call(rbind,lapply(ssc.genes,function(genename){
this.gff<-gff[grep(genename,gff$attribute),]
chrom<-unique(as.character(this.gff$seqname))
start<-min(this.gff$start)
stop<-max(this.gff$end)
return(c(chrom,start,stop))
}))
if(as.character(gene[[3]]) == "") { gene[[3]]<-NA }
new<-data.frame(Gene=rep(gene[[1]],length(gene.info[,1])),
Function=rep(gene[[2]],length(gene.info[,1])),
Chromsome=rep(gene[[3]],length(gene.info[,1])),
Species=rep(gene[[4]],length(gene.info[,1])),
Citation=rep(gene[[5]],length(gene.info[,1])),
Scovelli_geneID=rep(gene[[6]],length(gene.info[,1])),
Notes=rep(gene[[7]],length(gene.info[,1])),
Chrom=gene.info[,1],StartBP=gene.info[,2],StopBP=gene.info[,3],
stringsAsFactors = FALSE)
return(as.data.frame(new))
}))
write.table(put.reg,"putative.gene.regions.tsv",col.names=T,sep='\t')
Note that some genes will have multiple rows as they match multiple locations in the genome.
put.reg$rad.loci<-apply(put.reg,1,function(gene){
rad<-vcf[vcf$`#CHROM` %in% gene[["Chrom"]] &
vcf$POS >= as.numeric(as.character(gene[["StartBP"]])) &
vcf$POS <= as.numeric(as.character(gene[["StopBP"]])),]
if(nrow(rad)>0){ rad.snps<-paste(rad$SNP,sep=",",collapse=",") }
else { rad.snps<-NA }
return(rad.snps)
})
About 48.1818182% of the putative genes have RAD loci within them - but only about 22.037037% of all of the annotations have RAD loci.
\(D'\) was calculated using a custom C++ program (found at https://github.com/spflanagan/SCA/tree/master/programs/pairwise_ld_vcf) using the formula:
\[D' = \sum_{i=1}^m\sum_{j=1}^n \frac{p_iq_j|D_{ij}|}{D_{max}},\] where \(m\) is the number of alleles at locus and \(n\) is the number of alleles at locus B. \(D_{ij}\) was calculated as \(f_{ij}-p_iq_j\), where \(f_{ij}\) is the frequency of the \(A_iB_j\) haplotype, \(p_i\) is the frequency of allele \(i\), and \(q_j\) is the frequency of allele \(j\). \(D_{max}\) is the lesser of \(p_iq_j\) or \((1-p_i)(1-q_j)\) when \(D_{ij} \lt 0\) and is the minimum of \((1-p_i)q_i\) or \(p_i(1-q_i)\) when \(D_{ij} \gt 0\).
In the data.frame ld, the locus IDs (LocIDA and LocIDB) are the positions on the chromosome.
ld$pos.diff<-abs(ld$LocIDA - ld$LocIDB)
chrom.ld<-tapply(ld[ld$D.>0.5,"pos.diff"],ld[ld$D. >0.5,"Chrom"],mean)
#' Find outliers in a putative gene region
outlier.in.region<-function(outlier.df, region.df, oChrom="Chrom", oBP="BP",
rChrom="Chrom",rBPStart="StartBP",rBPStop="StopBP"){
out.in<-apply(region.df,1,function(put.gene){
oin<-outlier.df[outlier.df[[oChrom]] %in% put.gene[[rChrom]]
& outlier.df[[oBP]] >= as.numeric(as.character(put.gene[[rBPStart]]))
& outlier.df[[oBP]] <= as.numeric(as.character(put.gene[[rBPStop]])),"SNP"]
if(length(oin)==0){ oin<-NA }
return(paste(oin,sep=",",collapse=","))
})
return(out.in)
}
outlier.nearby<-function(outlier.df,region.df,ld.df,
oChrom="Chrom", oBP="BP",
rChrom="Chrom",rBPStart="StartBP",rBPStop="StopBP"){
out.nearby<-apply(region.df,1,function(put.gene){
nearby<-outlier.df[outlier.df[[oChrom]] %in% put.gene[[rChrom]]
& outlier.df[[oBP]] >= (as.numeric(as.character(put.gene[[rBPStart]])) - (ld.df[put.gene[[rChrom]] ]))
& outlier.df[[oBP]] <= (as.numeric(as.character(put.gene[[rBPStop]])) + (ld.df[put.gene[[rChrom]] ])),"SNP"]
if(length(nearby)==0){ nearby<-NA }
return(paste(nearby,sep=",",collapse=","))
})
}
#' Get the plotting positions for the putative genes
all.chr<-data.frame(Chr=vcf$`#CHROM`,BP=vcf$POS,stringsAsFactors = F)
bounds<-data.frame(levels(as.factor(all.chr$Chr)),tapply(as.numeric(as.character(all.chr$BP)),all.chr$Chr,max))
colnames(bounds)<-c("Chrom","End")
plot.scaffs<-scaffs[scaffs %in% bounds$Chrom]
#convert put.reg to the plotting BP
new.dat<-data.frame(stringsAsFactors = F)
last.max<-0
for(i in 1:length(plot.scaffs)){
#pull out the data for this scaffold
if(nrow(bounds[bounds$Chrom %in% plot.scaffs[i],])>0){ #sanity check
chrom.dat<-put.reg[put.reg$Chrom %in% plot.scaffs[i],]
if(nrow(chrom.dat)>0){
chrom.dat$plot.min<-as.numeric(as.character(chrom.dat$StartBP))+last.max
chrom.dat$plot.max<-as.numeric(as.character(chrom.dat$StopBP))+last.max
new.dat<-rbind(new.dat,chrom.dat)
#last.max<-max(chrom.dat$plot.pos)+
# as.numeric(scaffold.widths[scaffold.widths[,1] %in% scaffs.to.plot[i],2])
}
last.max<-last.max+
as.numeric(bounds[bounds$Chrom %in% plot.scaffs[i],2])
}else{
print(paste(plot.scaffs[i], "has no designated width."))
}
}
#make sure everything is the correct class
new.dat$plot.min<-as.numeric(as.character(new.dat$plot.min))
new.dat$plot.max<-as.numeric(as.character(new.dat$plot.max))
Outlined points are Stacks shared outliers. Black bars are putative genes
Are outliers in putative gene regions?
all.put.ssc<-as.character(put.reg$Scovelli_geneID)
all.put.ssc<-unlist(lapply(all.put.ssc,strsplit,","))
sig.put.match<-all.put.ssc[all.put.ssc %in% fw.sig.reg$SSCID]
print(dim(sig.put.match))
## NULL
#or use the functions
fw.sig.reg$SNP<-paste(fw.sig.reg$Chr,as.character(fw.sig.reg$BP),sep=".")
stacks.in<-outlier.in.region(fw.sig.reg,put.reg,oChrom="Chr")
Of the 48 shared outlier SNPs, 26 are in coding regions of the genome. However, 2 shared outliers are in putative gene regions.
No, they don’t overlap. Maybe they’re in the same LD region?
put.nearby.rad<-outlier.nearby(fw.sig.reg,put.reg,chrom.ld,oChrom = "Chr")
#add the nearby significant rad loci to the put.reg data.frame
put.reg$nearby.rad<-put.nearby.rad
head(put.reg[put.reg$nearby.rad!="NA",])
## Gene
## 1 ABCB7
## 2 ABLIM3
## 3 ADAM19
## 4 ADAM19
## 6 ADRB2
## 7 ADRB2
## Function
## 1 iron metabolism
## 2 actin-binding
## 3 osteoblast differentiation
## 4 osteoblast differentiation
## 6 bone density and mineralization
## 7 bone density and mineralization, tooth organogenesis, craniofacial development
## Chromsome Species Citation
## 1 <NA> Gasterosteus aculeatus Jones et al 2012b
## 2 <NA> Gasterosteus aculeatus Hohenlohe et al 2010
## 3 IV Gasterosteus aculeatus Hohenlohe et al 2010
## 4 IV Gasterosteus aculeatus Hohenlohe et al 2010
## 6 VII Gasterosteus aculeatus Hohenlohe et al 2010
## 7 IV Gasterosteus aculeatus Hohenlohe et al 2010
## Scovelli_geneID Notes Chrom StartBP StopBP rad.loci
## 1 SSCG00000000949 <NA> LG14 5689975 5711169 <NA>
## 2 SSCG00000004181 <NA> LG14 12122996 12144915 <NA>
## 3 SSCG00000004162,SSCG00000007962 <NA> LG14 11860301 11877333 <NA>
## 4 SSCG00000004162,SSCG00000007962 <NA> LG14 22306738 22325075 <NA>
## 6 SSCG00000004169 <NA> LG14 12061583 12065881 <NA>
## 7 SSCG00000004169 <NA> LG14 12061583 12065881 <NA>
## nearby.rad
## 1 LG14.3289190,LG14.3352883,LG14.3960463,LG14.5029044
## 2 LG14.5029044,LG14.13689654,LG14.18295431
## 3 LG14.5029044,LG14.13689654,LG14.18295431
## 4 LG14.18295431
## 6 LG14.5029044,LG14.13689654,LG14.18295431
## 7 LG14.5029044,LG14.13689654,LG14.18295431
So of the 540 gene annotations 229 are within the mean LD range of an outlier, which represent 67 putative freshwater genes. Those genes are: ABCB7, ABLIM3, ADAM19, ADRB2, AFAP1L1, angiotensin II receptor, ANXA6, APOL, AQP3, ARHGEF3, ATP6V0A1, ATP6V1A, AVPR2, BGN, CA, CAII, CAMKK1, CD63, CEBPD, CFTR, cortisol, ECaC, EDA, EFNB1, FBLN1, FERH1, FZD2, HRH2, IGFBP5, IGFBP6, IGF-I, IL12B, KCNH4, KITLG, LEPR, LTB4R, MAPK11, MAPK12, MSX2, mucin, NAKATPase, NBC1, NCX, NHE, NKCC, NR4A1, PDLIM7, PMCA, PODXL, PRKCD, PRL, RDH10, RHOA, RHOGTP8, rtCR2, SCUBE1, SLC26A3, SLC2A13, SPG1, SYNPO, TBX2, TRIM14, UT, VATPase, WNT5A, WNT7B, ZEB1
Now for \(\delta\) -divergence
#in the gene
sdd.in<-outlier.in.region(sdd.out,put.reg,oBP = "Pos")
put.reg$sdd.in<-sdd.in
The genes ARHGEF3, ATP6V0A1, ATP6V1A, FLT4, OSBPL8, RHOGTP8, SLC2A13, SPG1, STAT3, TIMP3, VATPase contain \(\delta\)-divergence outlier regions.
#in the gene
sdd.nb<-outlier.nearby(sdd.out,put.reg,chrom.ld,oBP = "Pos")
And of the 682 \(\delta\)-divergence outliers, 93.7037037% are in the LD region around putative freshwater genes.
#taken directly from fwsw_analysis.R
bf<-read.delim("bayenv/p4.bf.txt",header=T)
bf$SNP<-paste(bf$scaffold,as.numeric(as.character(bf$BP))+1,sep=".")
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
temp.bf.sig<-bf[bf$Temp_BF>bf.co["Temp_BF"],c(1,2,4,8,5,9)]
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,6,9)]
grass.bf.sig<-bf[bf$seagrass_BF>bf.co["seagrass_BF"],c(1,2,4,8,7,9)]
#get the log transformed Bayes Factors
bf$logSal<-log(bf$Salinity_BF)
bf$logTemp<-log(bf$Temp_BF)
bf$logSeagrass<-log(bf$seagrass_BF)
There are 0 overlapping outliers between temperature-, salinity-, and seagrass-associated loci.
But if we only care about salinity ones, there are 91 outliers.
Are any of the Bayenv salinity outliers near the putative freshwater genes?
bfs.in<-outlier.in.region(sal.bf.sig,put.reg,"scaffold")
bfs.nb<-outlier.nearby(sal.bf.sig,put.reg,chrom.ld,"scaffold")
Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.3703704% are in putative freshwater genes and 75.5555556% are within the LD neighborhood.
Are any of the Bayenv temperature outliers near putative freshwater genes?
bft.in<-outlier.in.region(temp.bf.sig,put.reg,"scaffold")
bft.nb<-outlier.nearby(temp.bf.sig,put.reg,chrom.ld,"scaffold")
Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.5555556% are in putative freshwater genes and 80.1851852% are within the LD neighborhood.
Are any of the loci associated with seagrass density in or near putative freshwater genes?
bfg.in<-outlier.in.region(grass.bf.sig,put.reg,"scaffold")
bfg.nb<-outlier.nearby(grass.bf.sig,put.reg,chrom.ld,"scaffold")
Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.1851852% are in putative freshwater genes and 77.962963% are within the LD neighborhood.
put.reg$bfs.in<-bfs.in
put.reg$bft.in<-bft.in
put.reg$bfg.in<-bfg.in
unique(put.reg[put.reg$bfs.in !="NA","Gene"])
## [1] ARHGEF3 NHE
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
unique(put.reg[put.reg$bft.in !="NA","Gene"])
## [1] APOL NHE TRIM14
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
unique(put.reg[put.reg$bfg.in !="NA","Gene"])
## [1] ARHGEF3
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
All three BayEnv tests identified outliers in the genes . The temperature and grass also share NA, and temperature and salinity share NHE.
Clearly, RHOGTP8 and TNS1 are showing up repeatedly (all Bayenv2 outliers, and \(\delta\)-divergence outliers).
But looking at put.reg[put.reg$Gene =="RHOGTP8","rad.loci"], it’s obvious that Rho GTPase-activating proteins are distributed throughout the genome (on chromosomes LG11, LG15, LG20, LG14, LG1, scaffold_1578, LG12, LG9, LG8, LG22, LG18, LG5, scaffold_1008, LG6, scaffold_950, LG10, LG7, scaffold_139, LG21, LG16, LG19).
Jost’s D measures the fraction of allelic differentiation between populations. According to Molecular Ecologist, “if allelic differentiation at a particular locus is the value of interest, it appears that D is the best measure”, so it might be useful to look at Jost’s D in the putatively freshwater genes.
jost.in<-outlier.in.region(jostd,put.reg,oChrom="Chr",oBP="POS")
jost.nb<-outlier.nearby(jostd,put.reg,chrom.ld,oChrom = "Chr",oBP="POS")
put.reg$jostd.nb<-jost.nb
put.reg$jostd.in<-jost.in
So 119 loci are located within putative freshwater genes, but 517 are within the LD region of the putative genes, representing 95.7407407 of the putative gene annotations and 109 genes
*Genes containing shared Fst outliers: NA, AE, APOL, AQP3, ARHGEF3, ATP6V0A1, ATP6V1A, BMI1, CA, CA4, CAII, CAMKK1, CLINT1, cortisol, EFNB1, FBLN1, FLT4, IGFBP2, IGFBP5, IGFBP6, IL12B, KCNH4, Kir, Kir2.2, KITLG, LEPR, LTB4R, MAPK11, MAPK12, MSX2, mucin, NAKATPase, NCX, NHE, OSBPL8, PDLIM7, PRKCD, PRL, RHOA, RHOGTP8, ROBO1, rtCR1, SCUBE1, SLC2A13, SOD3, SPG1, STAT3, SYNPO, TIMP3, TNS1, TRIM14, TSHB, VATPase, ZEB1
*Genes containing \(\delta\) -divergence outliers: ARHGEF3, ATP6V0A1, ATP6V1A, FLT4, OSBPL8, RHOGTP8, SLC2A13, SPG1, STAT3, TIMP3, VATPase
*Genes containing Salinity-associated outliers: ARHGEF3, NHE
*Genes containing Jost D outliers: AE, APOL, AQP3, ARHGEF3, ATP6V0A1, ATP6V1A, BMI1, CA, CA4, CAII, CAMKK1, CLINT1, cortisol, EFNB1, FBLN1, FLT4, IGFBP2, IGFBP5, IGFBP6, IL12B, KCNH4, Kir, Kir2.2, KITLG, LEPR, LTB4R, MAPK11, MAPK12, MSX2, mucin, NAKATPase, NCX, NHE, OSBPL8, PDLIM7, PRKCD, PRL, RHOA, RHOGTP8, ROBO1, rtCR1, SCUBE1, SLC2A13, SOD3, SPG1, STAT3, SYNPO, TIMP3, TNS1, TRIM14, TSHB, VATPase, ZEB1
*Overlap: ARHGEF3
*Divergence analyses: ARHGEF3, ATP6V0A1, ATP6V1A, FLT4, OSBPL8, RHOGTP8, SLC2A13, SPG1, STAT3, TIMP3, VATPase
I’m highlighting a few of the putative genes that have a bunch of outliers nearby or in them. First is TNS1, which matched three genome annotations but only had one region, on LG1, that contained shared outlier Fsts, delta divergence, Bayenv salinity, were near monophyletic neighbor joining trees.
fst.dfs<-list(fwsw.tx,fwsw.la,fwsw.al,fwsw.fl)
names(fst.dfs)<-c("TX FWSW","LA FWSW","AL FWSW","FL FWSW")
colors=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6])
alphas=c(0.5,0.5,1,0.5)
ltys=c(1,1,2,1)
The plotting function gene.region.plot accpets a number of parameters.
gene.region.plot<-function(chrom,gene,put.reg,vcf,chrom.ld, fst.dfs,deltad=FALSE,D=FALSE,
colors="black",lwds=2,alphas=0.5,ltys=1,legend=TRUE,bases="bp",
smooth=FALSE, fst.name="Corrected.AMOVA.FST",txt.cex=1,y.lim=c(0,1),...){
pstart<-min(as.numeric(as.character(put.reg[put.reg$Gene ==gene &
put.reg$Chrom==chrom,"StartBP"])))-(chrom.ld[chrom]/2)
pstop<-max(as.numeric(as.character(put.reg[put.reg$Gene ==gene &
put.reg$Chrom==chrom,"StartBP"])))+(chrom.ld[chrom]/2)
if(pstart < 0){ pstart<- 0 }
if(pstop > max(vcf[vcf$`#CHROM`==chrom,"POS"])){ pstop <- max(vcf[vcf$`#CHROM`==chrom,"POS"]) }
starts<-as.numeric(put.reg[put.reg$Chrom %in% chrom & put.reg$Gene == gene,"StartBP"])
stops<-as.numeric(put.reg[put.reg$Chrom %in% chrom & put.reg$Gene == gene,"StopBP"])
if(smooth==TRUE){
#generate the dataframes
smooth.fsts<-lapply(fst.dfs,function(df){
this.df<-df[df$Chr %in% chrom,]
this.smooth<-do.call("rbind",lapply(seq(1,nrow(this.df),(nrow(this.df)*0.15)/5),sliding.avg,
dat=data.frame(Pos=this.df$BP,Fst=this.df[,fst.name]),
width=nrow(this.df)*0.15))
return(this.smooth)
})
} else{
smooth.fsts<-lapply(fst.dfs,function(df){
this.smooth<-df[df$Chr %in% chrom,c("BP",fst.name)]
})
}
if(is.data.frame(deltad)){
this.dd<-deltad[deltad$Chrom %in% chrom,]
dd.smooth<-loess.smooth(this.dd$Pos,this.dd$deltad,span=0.1,degree=2)
}
if(is.data.frame(D)){
this.d<-D[D$Chr %in% chrom,]
dsmooth<-loess.smooth(this.d$POS,this.d$D,span=0.1,degree=2)
}
pr.gene<-put.reg[put.reg$Chrom==chrom & put.reg$Gene==gene,]
fst.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$rad.loci[pr.gene$rad.loci != "NA"]),","))
sal.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bfs.in[pr.gene$bfs.in != "NA"]),","))
# tmp.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bft.in[pr.gene$bft.in != "NA"]),","))
#grs.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bfg.in[pr.gene$bfg.in != "NA"]),","))
#jd.gene<-unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$jostd.nb[jostd.nb != "NA"]),","))
sdd.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$sdd.in[pr.gene$sdd.in != "NA"]),","))
#nj.gene<-as.numeric(unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$nj.nb[pr.gene$nj.nb != "NA"]),",")))
nj.gene<-NULL
nb.gene<-as.numeric(unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$nearby.rad[pr.gene$nearby.rad != "NA"]),",")))
gene.out<-data.frame(BP=c(as.numeric(fst.gene),as.numeric(sal.gene),as.numeric(sdd.gene)),
shape=c(rep(8,length(fst.gene)),rep(1,length(sal.gene)),rep(4,length(sdd.gene))))
if(length(colors) != length(smooth.fsts)){
colors<-rep(colors,length(smooth.fsts)/length(colors))
}
if(length(lwds) != length(smooth.fsts)){
lwds<-rep(lwds,length(smooth.fsts)/length(lwds))
}
if(length(alphas) != length(smooth.fsts)){
alphas<-rep(alphas,length(smooth.fsts)/length(alphas))
}
if(length(ltys) != length(smooth.fsts)){
ltys<-rep(ltys,length(smooth.fsts)/length(ltys))
}
plot(smooth.fsts[[1]],type='n',ylim=c(y.lim[1],y.lim[2]+0.02),
bty="L",xlab="",ylab="",xaxt='n',yaxt='n',xlim=c(pstart,pstop))
axis(2,las=1,cex.axis=txt.cex)
xlabs<-c(pstart,pstop)
if(bases %in% c("mb","MB","Mb")) xlabs<-c(round((pstart/1000000),2),round((pstop/1000000),2))
if(bases %in% c("kb","KB","Kb")) xlabs<-c(round((pstart/1000),2),round((pstop/1000),2))
axis(1,at=c(pstart,pstop),cex.axis=txt.cex,labels = xlabs)
#add putative gene
rect(xleft=as.numeric(starts),xright=as.numeric(stops),
ybottom=y.lim[1]-0.04,ytop=y.lim[2],col="indianred",border="indianred")
#add fsts
mtext(chrom,1,cex=0.75*txt.cex,line = 1)
for(i in 1:length(smooth.fsts)){
points(smooth.fsts[[i]],col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
}
#add delta divergence
if(is.data.frame(deltad)){
points(dd.smooth$x,dd.smooth$y,col="#dfc27d",type="l",lwd=2)
}
#add Jost's D
if(is.data.frame(D)){
points(dsmooth$x,dsmooth$y,type="l",col="#a6611a",lwd=2)
}
#are nj trees shown in scope of fig?
njs.eval<-lapply(nj.gene,function(x){
if(x >= pstart & x <= pstop) { eval = TRUE }
else { eval = FALSE }
return(eval)
})
#add delta d
if(is.data.frame(deltad)){
lgnd<-unlist(lapply(names(fst.dfs),function(n) { bquote(.(n)~italic(F[ST])) }))
lgnd<-c(lgnd,expression(delta~-divergence))
}else{
lgnd<-unlist(lapply(names(fst.dfs),function(n) { substitute(paste(name,italic(F[ST]),sep=""),list(name=n)) }))
}
cols<-NULL
pchs<-rep(32,length(lwds))
for(i in 1:length(colors)){
cols<-c(cols,alpha(colors[i],alphas[i]))
}
if(is.data.frame(deltad)){
cols<-c(cols,"#dfc27d")
pchs<-c(pchs,32)
ltys<-c(ltys,1)
lwds<-c(lwds,2)
}
if(is.data.frame(D)){
cols<-c(cols,"#a6611a")
pchs<-c(pchs,32)
lgnd<-c(lgnd,"Jost's D")
ltys<-c(ltys,1)
lwds<-c(lwds,2)
}
lgnd<-c(lgnd,substitute(italic(g),list(g=gene)),"Outlier RAD loci")
cols<-c(cols,"indianred","black")
pchs<-c(pchs,15,124)
if(length(grep(TRUE,njs.eval))>0){
lgnd<-c(lgnd,"Monophyletic Gene Tree")
cols<-c(cols,"#08519c")
pchs<-c(pchs,124)
}
if(length(sal.gene)>0){
lgnd<-c(lgnd,"Salinity-associated")
cols<-c(cols,"black")
pchs<-c(pchs,25)
}
if(legend==TRUE){
legend("topleft",
legend=lgnd,pch=pchs,
bty='n',lwd=c(lwds,0,0,0,0),lty=c(ltys,0,0,0,0),
col=cols,cex = txt.cex)
}
#add outlier loci
clip(x1=min(as.numeric(pstart)),x2=max(as.numeric(pstop)),y1=y.lim[2]+.01,y2=y.lim[2]+0.2)
abline(v=fst.gene,col="black")
points(x=sal.gene,y=rep(y.lim[2]+0.05,length(sal.gene)),col="black",pch=25,cex=0.5*txt.cex)
abline(v=nb.gene,col=alpha("black",0.5),cex=2)
abline(v=nj.gene,col="#08519c",cex=2)
}
par(oma=c(2,2,2,2),mar=c(2,2,2,2))
tns1<-lapply(unique(put.reg[put.reg$Gene=="TNS1" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="TNS1",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,D=jostd,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",txt.cex=1.3)
par(mfrow=c(2,2),oma=c(2,2,2,2),mar=c(2,2,2,2))
arhgef3<-lapply(unique(put.reg[put.reg$Gene=="ARHGEF3" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="ARHGEF3",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,D=jostd,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst")
par(mfrow=c(2,4),oma=c(2,2,2,2),mar=c(2,2,2,2))
ca<-lapply(unique(put.reg[put.reg$Gene=="CA" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="CA",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst")
ca2<-lapply(unique(put.reg[put.reg$Gene=="CAII" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="CAII",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst")
par(mfrow=c(2,4),oma=c(2,2,2,2),mar=c(2,2,2,2))
rhogtp8<-lapply(unique(put.reg[put.reg$Gene=="RHOGTP8" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="RHOGTP8",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,D=jostd,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst")
par(oma=c(2,2,2,2),mar=c(2,2,2,2))
slc26a3<-lapply(unique(put.reg[put.reg$Gene=="SLC26A3" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="SLC26A3",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,D=jostd,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst") #LG20 doesn't plot
par(mfrow=c(2,3),oma=c(2,2,2,2),mar=c(2,2,2,2))
trim14<-lapply(unique(put.reg[put.reg$Gene=="TRIM14" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="TRIM14",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,D=jostd,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst")
par(oma=c(2,2,2,2),mar=c(2,2,2,2))
pmca<-lapply(unique(put.reg[put.reg$Gene=="PMCA" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="PMCA",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,D=jostd,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst")
par(mfrow=c(1,3),oma=c(2,2,2,2),mar=c(2,2,2,2))
vatpase<-lapply(unique(put.reg[put.reg$Gene=="VATPase" & !is.na(put.reg$rad.loci),"Chrom"]),
gene.region.plot,gene="VATPase",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=deltad,D=jostd,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",txt.cex=1.3)
Based on the plots above, I’ll exclude cases where the candidate gene is beyond the scope of the FST data.
colors=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6])
alphas=c(0.5,0.5,1,0.5)
ltys=c(1,1,2,1)
png("Fig7candidateGenes.png",height=10,width=10,units="in",res=300)
par(mfrow=c(4,4),oma=c(2,2,4.5,2),mar=c(1,3,2,1))
#row1
gene.region.plot("LG1","TNS1",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=18000000,y=0.4,expression(italic(TNS1)),xpd=T,cex=2)
gene.region.plot("LG1","PMCA",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=12000000,y=0.4,expression(italic(PMCA)),xpd=T,cex=2)
gene.region.plot("LG1","CAII",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=4500000,y=0.4,expression(italic(CAII)),xpd=T,cex=2)
gene.region.plot("LG2","CAII",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=7500000,y=0.4,expression(italic(CAII)),xpd=T,cex=2)
#row2
gene.region.plot("LG10","CAII",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=3000000,y=0.4,expression(italic(CAII)),xpd=T,cex=2)
gene.region.plot("LG21","CAII",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=450000,y=0.4,expression(italic(CAII)),xpd=T,cex=2)
gene.region.plot("LG2","TRIM14",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=4000000,y=0.4,expression(italic(TRIM14)),xpd=T,cex=2)
gene.region.plot("LG5","TRIM14",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=7000000,y=0.4,expression(italic(TRIM14)),xpd=T,cex=2)
#row3
gene.region.plot("LG10","TRIM14",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=6000000,y=0.4,expression(italic(TRIM14)),xpd=T,cex=2)
gene.region.plot("LG11","TRIM14",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=2500000,y=0.4,expression(italic(TRIM14)),xpd=T,cex=2)
gene.region.plot("LG13","TRIM14",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=1500000,y=0.4,expression(italic(TRIM14)),xpd=T,cex=2)
gene.region.plot("LG1","VATPase",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=6500000,y=0.4,expression(italic(VATPase)),xpd=T,cex=2)
#row4
gene.region.plot("LG6","VATPase",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.5))
text(x=4500000,y=0.4,expression(italic(VATPase)),xpd=T,cex=2)
gene.region.plot("LG8","VATPase",put.reg,vcf,chrom.ld, fst.dfs,deltad=deltad,txt.cex=2,
colors=colors,alphas=alphas,ltys=ltys,legend=FALSE,fst.name="Smoothed.AMOVA.Fst",
bases = "MB",y.lim = c(0,0.6))
text(x=4500000,y=0.5,expression(italic(VATPase)),xpd=T,cex=2)
par(fig = c(0, 1, 0, 1),oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
plot(0:1.5, 0:1.5, type="n", xlab="", ylab="", axes=FALSE)
legend(x=0.5,y=0.22,c(expression(Shared~italic(F)[ST]~Outliers),
"Salinity-associated SNPs",
"FW Candidate Gene"),
pch=c(124,25,124),lty=c(0,0,0),lwd=2,
col=c("black","black","indianred"),
bty='n',ncol=1,cex=2,x.intersp=-0.5,xpd=T)
#add lines to the top
par(fig = c(0, 1, 0, 1),oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
legend("top",c(expression(TX~FWSW~italic(F)[ST]),
expression(LA~FWSW~italic(F)[ST]),
expression(AL~FWSW~italic(F)[ST]),
expression(FL~FWSW~italic(F)[ST]),
expression(delta~-divergence)),
pch=rep(32,5),lty=c(1,1,1,1,1),lwd=2,text.width=0.16,
col=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
"#dfc27d"),
bty='n',ncol=3,cex=2,x.intersp=0.5,y.intersp=0.75,xpd=T)
dev.off()
## png
## 2
Fig. 7
#define a few things
chrom<-"LG4"
pchs<-c(rep(32,6),15,124,124)
cols<-c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
"#dfc27d","#a6611a","indianred","black","#08519c")
ltys=c(1,1,2,1,1,1,0,0,0)
lwds<-c(rep(2,6),0,0,0)
#put together the data for LG4
this.dd<-deltad[deltad$Chrom %in% chrom,]
dd.smooth<-loess.smooth(this.dd$Pos,this.dd$deltad,span=0.1,degree=2)
#add Jost's D
this.d<-jostd[jostd$Chr %in% chrom,]
dsmooth<-loess.smooth(this.d$POS,this.d$D,span=0.1,degree=2)
#get the outliers
pr.gene<-put.reg[put.reg$Chrom==chrom ,]
#rad loci
fst.gene<- stacks.sig[stacks.sig$Chr==chrom,"BP"]
#none of the salinity-associated RAD loci are in or near genes on LG8, but we could plot them all anyway
sal.gene<-sal.bf.sig[sal.bf.sig$scaffold=="LG8","BP"]
#jost's d - none are in the putative genes
jd.gene<-unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$jost.in[jost.in != "NA"]),","))
#delta divergence outliers- none are in the putative genes
sdd.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$sdd.in[pr.gene$sdd.in != "NA"]),","))
#put all of them together with their associated shapes
gene.out<-data.frame(BP=c(as.numeric(fst.gene),as.numeric(sal.gene),as.numeric(sdd.gene)),
shape=c(rep(8,length(fst.gene)),rep(1,length(sal.gene)),rep(4,length(sdd.gene))))
png("LG4.png",width=10,height=7,units="in",res=300)
nf <- layout(matrix(c(1,1,1,1,
2,2,2,2,
3,3,3,4), nrow=3, byrow=TRUE))
par(oma=c(1.5,2.5,1,1),mar=c(1.5,1,2,1))
plot(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom],
fst.dfs[[1]]$Smoothed.AMOVA.Fst[fst.dfs[[1]]$Chr %in% chrom],
type='n',ylim=c(0,1),bty="L",xlab="",ylab="",xaxt='n',yaxt='n')
axis(2,las=1,cex.axis=2)
axis(1,cex.axis=2,at=c(0,5000000,10000000,15000000),labels = c(0,5,10,15))
#add putative gene
g<-put.reg[put.reg$Chrom %in% chrom,]
g$Gene<-as.character(g$Gene)
g$Gene[grep("ATP6",g$Gene)]<-"ATP6V" #standardize the names
g<-g[,c("Gene","StartBP","StopBP")]
g<-g[!duplicated(g$StartBP),]
g<-g[order(g$StartBP),]
starts<-as.numeric(g[,"StartBP"])
stops<-as.numeric(g[,"StopBP"])
rect(xleft=as.numeric(starts),xright=as.numeric(stops),
ybottom=-0.04,ytop=1,col="indianred",border="indianred")
#ad the ones that are spaced out
text(x=g$StartBP[!g$Gene %in% c("SCUBE1","TRIM14")],y=0.5,
labels=g$Gene[!g$Gene %in% c("SCUBE1","TRIM14")],font=2,srt=90,xpd=T,cex=1.75)
text(x=g$StartBP[g$Gene == "SCUBE1"]-50000,y=0.8,
labels=g$Gene[g$Gene =="SCUBE1"],font=2,srt=90,xpd=T,cex=1.75)
text(x=g$StartBP[g$Gene == "TRIM14"]+50000,y=0.2,
labels=g$Gene[g$Gene =="TRIM14"],font=2,srt=90,xpd=T,cex=1.75)
#add fsts
mtext(chrom,1,cex=2*0.75,line=2.5)
for(i in 1:length(fst.dfs)){
points(fst.dfs[[i]]$BP[fst.dfs[[i]]$Chr %in% chrom],
fst.dfs[[i]]$Smoothed.AMOVA.Fst[fst.dfs[[i]]$Chr %in% chrom],
col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
}
#add delta-d and Jost's D
points(dd.smooth$x,dd.smooth$y,col="#dfc27d",type="l",lwd=2)
points(dsmooth$x,dsmooth$y,type="l",col="#a6611a",lwd=2)
#add outlier loci
clip(x1=min(as.numeric(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom])),
x2=max(as.numeric(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom])),y1=0.99,y2=1.06)
abline(v=fst.gene,col="black",lwd=1.5)
points(x=sal.gene,y=rep(1.05,length(sal.gene)),col="black",pch=25,cex=1)
###Zoom-in plot
plot(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom],
fst.dfs[[1]]$Smoothed.AMOVA.Fst[fst.dfs[[1]]$Chr %in% chrom],
type='n',ylim=c(0,1),bty="L",xlab="",ylab="",xaxt='n',yaxt='n',xlim=c(5000000,10000000))
axis(2,las=1,cex.axis=2)
axis(1,cex.axis=2,at=c(5000000,10000000),labels=c(5,10))
#add putative gene
g<-put.reg[put.reg$Chrom %in% chrom & put.reg$StartBP<10000000,]
g$Gene<-as.character(g$Gene)
g$Gene[grep("ATP6",g$Gene)]<-"ATP6V" #standardize the names
g<-g[,c("Gene","StartBP","StopBP")]
g<-g[!duplicated(g$StartBP),]
g<-g[order(g$StartBP),]
starts<-as.numeric(g[,"StartBP"])
stops<-as.numeric(g[,"StopBP"])
rect(xleft=as.numeric(starts),xright=as.numeric(stops),
ybottom=-0.04,ytop=1,col="indianred",border="indianred")
text(x=g$StartBP,y=0.6,labels=g$Gene,font=2,srt=90,xpd=T,cex=1.75)
# text(x=g$StartBP[1]-20000,y=0.6,labels=g$Gene[1],font=2,srt=90,xpd=T,cex=1.75)
# text(x=g$StartBP[2]-20000,y=0.6,labels=g$Gene[2],font=2,srt=90,xpd=T,cex=1.75)
# text(x=g$StartBP[3]-10000,y=0.6,labels=g$Gene[3],font=2,srt=90,xpd=T,cex=1.75)
# text(x=g$StartBP[4]+10000,y=0.6,labels=g$Gene[4],font=2,srt=90,xpd=T,cex=1.75)
# text(x=g$StartBP[5],y=0.6,labels=g$Gene[5],font=2,srt=90,xpd=T,cex=1.75)
mtext(chrom,1,cex=2*0.75,line=2)
for(i in 1:length(fst.dfs)){
points(fst.dfs[[i]]$BP[fst.dfs[[i]]$Chr %in% chrom],
fst.dfs[[i]]$Smoothed.AMOVA.Fst[fst.dfs[[i]]$Chr %in% chrom],
col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
}
points(dd.smooth$x,dd.smooth$y,col="#dfc27d",type="l",lwd=2)
points(dsmooth$x,dsmooth$y,type="l",col="#a6611a",lwd=2)
clip(x1=min(as.numeric(0)),x2=max(as.numeric(3500000)),y1=0.99,y2=1.06)
abline(v=fst.gene,col="black")
points(x=sal.gene,y=rep(1.05,length(sal.gene)),col="black",pch=25,cex=1)
#abline(v=nb.gene,col=alpha("black",0.5),lwd=1.5)
###Recombination rate
load("~/Projects/ssc_map.rda")
#plot LG4
plot(set[["LG4"]]@interpolations$slidingwindow@physicalPositions,
set[["LG4"]]@interpolations$slidingwindow@rates,type="l",
ylim=c(-5,25),xlab="",ylab="",xaxt='n',lwd=2)
abline(h=0,lty=2)
axis(1,at=seq(0,1.8*10^7,3*10^6),labels = seq(0,18,3))
mtext("Position on LG4 (Mb)",1,line=2)
mtext("Recombination Rate (cM/Mb)",2,line=2)
lines(set[["LG4"]]@interpolations$spline@physicalPositions,
set[["LG4"]]@interpolations$spline@rates,type="l",col="blue",lwd=2)
lines(set[["LG4"]]@interpolations$loess@physicalPositions,
set[["LG4"]]@interpolations$loess@rates,type="l",lwd=2,col="forestgreen")
legend("topleft",bty='n',col=c("black","blue","forestgreen"),lwd=2,c("Sliding Window","Spline","Loess"))
###add legend
lgnd<-unlist(lapply(names(fst.dfs),function(n) { bquote(.(n)~italic(F[ST])) }))
lgnd<-c(lgnd,expression(delta~-divergence),"Jost's D","gene regions",
expression("Shared"~italic(F[ST])~"outlier"),"Salinity-associated")
pchs<-c(rep(32,6),15,124,25)
cols<-c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
"#dfc27d","#a6611a","indianred","black","black")
ltys=c(1,1,2,1,1,1,0,0,0)
lwds<-c(rep(2,6),0,0,0)
plot(c(0,1),c(0,1),bty='n',type='n',xlab="",ylab="",xaxt='n',yaxt='n')
legend("topright",legend=lgnd,pch=pchs,
bty='n',lwd=lwds,lty=ltys,
col=cols,xpd=TRUE,cex=1.5)
dev.off()
LG 4 Figure